<!DOCTYPE HTML>
<html lang="Chinese">


<head>
    <meta charset="utf-8">
    <meta name="keywords" content="爬虫(爬取b站), 博客">
    <meta name="description" content="In me the tiger sniffs the rose.">
    <meta http-equiv="X-UA-Compatible" content="IE=edge">
    <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=no">
    <meta name="renderer" content="webkit|ie-stand|ie-comp">
    <meta name="mobile-web-app-capable" content="yes">
    <meta name="format-detection" content="telephone=no">
    <meta name="apple-mobile-web-app-capable" content="yes">
    <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
    <!-- Global site tag (gtag.js) - Google Analytics -->


    <title>爬虫(爬取b站) | 凡诚</title>
    <link rel="icon" type="image/png" href="/favicon.png">

    <link rel="stylesheet" type="text/css" href="/libs/awesome/css/all.css">
    <link rel="stylesheet" type="text/css" href="/libs/materialize/materialize.min.css">
    <link rel="stylesheet" type="text/css" href="/libs/aos/aos.css">
    <link rel="stylesheet" type="text/css" href="/libs/animate/animate.min.css">
    <link rel="stylesheet" type="text/css" href="/libs/lightGallery/css/lightgallery.min.css">
    <link rel="stylesheet" type="text/css" href="/css/matery.css">
    <link rel="stylesheet" type="text/css" href="/css/my.css">
    <script src="https://sdk.jinrishici.com/v2/browser/jinrishici.js" charset="utf-8"></script>
    <script src="/libs/jquery/jquery.min.js"></script>

<meta name="generator" content="Hexo 5.4.2"></head>



   <style>
    body{
       background-image: url(https://cdn.jsdelivr.net/gh/Tokisaki-Galaxy/res/site/medias/background.jpg);
       background-repeat:no-repeat;
       background-size:cover;
       background-attachment:fixed;
    }
</style>



<body>
    <header class="navbar-fixed">
    <nav id="headNav" class="bg-color nav-transparent">
        <div id="navContainer" class="nav-wrapper container">
            <div class="brand-logo">
                <a href="/" class="waves-effect waves-light">
                    
                    <img src="/medias/comment_bg.png" class="logo-img" alt="LOGO">
                    
                    <span class="logo-span">凡诚</span>
                </a>
            </div>
            

<a href="#" data-target="mobile-nav" class="sidenav-trigger button-collapse"><i class="fas fa-bars"></i></a>
<ul class="right nav-menu">
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/" class="waves-effect waves-light">
      
      <i class="fas fa-home" style="zoom: 0.6;"></i>
      
      <span>主页</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/tags" class="waves-effect waves-light">
      
      <i class="fas fa-tags" style="zoom: 0.6;"></i>
      
      <span>文章</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/categories" class="waves-effect waves-light">
      
      <i class="fas fa-bookmark" style="zoom: 0.6;"></i>
      
      <span>分类</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="/archives" class="waves-effect waves-light">
      
      <i class="fas fa-archive" style="zoom: 0.6;"></i>
      
      <span>归档</span>
    </a>
    
  </li>
  
  <li class="hide-on-med-and-down nav-item">
    
    <a href="" class="waves-effect waves-light">

      
      <i class="fas fa-user-circle" style="zoom: 0.6;"></i>
      
      <span>关于我</span>
      <i class="fas fa-chevron-down" aria-hidden="true" style="zoom: 0.6;"></i>
    </a>
    <ul class="sub-nav menus_item_child ">
      
      <li>
        <a href="/about">
          
          <i class="fas fa-user-circle" style="margin-top: -20px; zoom: 0.6;"></i>
          
          <span>我的信息</span>
        </a>
      </li>
      
      <li>
        <a href="/contact">
          
          <i class="fas fa-comments" style="margin-top: -20px; zoom: 0.6;"></i>
          
          <span>留言板</span>
        </a>
      </li>
      
      <li>
        <a href="/friends">
          
          <i class="fas fa-address-book" style="margin-top: -20px; zoom: 0.6;"></i>
          
          <span>友链</span>
        </a>
      </li>
      
    </ul>
    
  </li>
  
  <li>
    <a href="#searchModal" class="modal-trigger waves-effect waves-light">
      <i id="searchIcon" class="fas fa-search" title="Search" style="zoom: 0.85;"></i>
    </a>
  </li>
</ul>


<div id="mobile-nav" class="side-nav sidenav">

    <div class="mobile-head bg-color">
        
        <img src="/medias/comment_bg.png" class="logo-img circle responsive-img">
        
        <div class="logo-name">凡诚</div>
        <div class="logo-desc">
            
            In me the tiger sniffs the rose.
            
        </div>
    </div>

    

    <ul class="menu-list mobile-menu-list">
        
        <li class="m-nav-item">
	  
		<a href="/" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-home"></i>
			
			主页
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/tags" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-tags"></i>
			
			文章
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/categories" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-bookmark"></i>
			
			分类
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="/archives" class="waves-effect waves-light">
			
			    <i class="fa-fw fas fa-archive"></i>
			
			归档
		</a>
          
        </li>
        
        <li class="m-nav-item">
	  
		<a href="javascript:;">
			
				<i class="fa-fw fas fa-user-circle"></i>
			
			关于我
			<span class="m-icon"><i class="fas fa-chevron-right"></i></span>
		</a>
            <ul  style="background:  ;" >
              
                <li>

                  <a href="/about " style="margin-left:75px">
				  
				   <i class="fa fas fa-user-circle" style="position: absolute;left:50px" ></i>
			      
		          <span>我的信息</span>
                  </a>
                </li>
              
                <li>

                  <a href="/contact " style="margin-left:75px">
				  
				   <i class="fa fas fa-comments" style="position: absolute;left:50px" ></i>
			      
		          <span>留言板</span>
                  </a>
                </li>
              
                <li>

                  <a href="/friends " style="margin-left:75px">
				  
				   <i class="fa fas fa-address-book" style="position: absolute;left:50px" ></i>
			      
		          <span>友链</span>
                  </a>
                </li>
              
            </ul>
          
        </li>
        
        
    </ul>
</div>


        </div>

        
    </nav>

</header>

    



<div class="bg-cover pd-header post-cover" style="background-image: url('/medias/featureimages/7.jpg')">
    <div class="container" style="right: 0px;left: 0px;">
        <div class="row">
            <div class="col s12 m12 l12">
                <div class="brand">
                    <h1 class="description center-align post-title">爬虫(爬取b站)</h1>
                </div>
            </div>
        </div>
    </div>
</div>




<main class="post-container content">

    
    <link rel="stylesheet" href="/libs/tocbot/tocbot.css">
<style>
    #articleContent h1::before,
    #articleContent h2::before,
    #articleContent h3::before,
    #articleContent h4::before,
    #articleContent h5::before,
    #articleContent h6::before {
        display: block;
        content: " ";
        height: 100px;
        margin-top: -100px;
        visibility: hidden;
    }

    #articleContent :focus {
        outline: none;
    }

    .toc-fixed {
        position: fixed;
        top: 64px;
    }

    .toc-widget {
        width: 345px;
        padding-left: 20px;
    }

    .toc-widget .toc-title {
        padding: 35px 0 15px 17px;
        font-size: 1.5rem;
        font-weight: bold;
        line-height: 1.5rem;
    }

    .toc-widget ol {
        padding: 0;
        list-style: none;
    }

    #toc-content {
        padding-bottom: 30px;
        overflow: auto;
    }

    #toc-content ol {
        padding-left: 10px;
    }

    #toc-content ol li {
        padding-left: 10px;
    }

    #toc-content .toc-link:hover {
        color: #42b983;
        font-weight: 700;
        text-decoration: underline;
    }

    #toc-content .toc-link::before {
        background-color: transparent;
        max-height: 25px;

        position: absolute;
        right: 23.5vw;
        display: block;
    }

    #toc-content .is-active-link {
        color: #42b983;
    }

    #floating-toc-btn {
        position: fixed;
        right: 15px;
        bottom: 76px;
        padding-top: 15px;
        margin-bottom: 0;
        z-index: 998;
    }

    #floating-toc-btn .btn-floating {
        width: 48px;
        height: 48px;
    }

    #floating-toc-btn .btn-floating i {
        line-height: 48px;
        font-size: 1.4rem;
    }
</style>
<div class="row">
    <div id="main-content" class="col s12 m12 l9">
        <!-- 文章内容详情 -->
<div id="artDetail">
    <div class="card">
        <div class="card-content article-info">
            <div class="row tag-cate">
                <div class="col s7">
                    
                    <div class="article-tag">
                        
                            <a href="/tags/python/">
                                <span class="chip bg-color">python</span>
                            </a>
                        
                            <a href="/tags/%E7%88%AC%E8%99%AB/">
                                <span class="chip bg-color">爬虫</span>
                            </a>
                        
                    </div>
                    
                </div>
                <div class="col s5 right-align">
                    
                    <div class="post-cate">
                        <i class="fas fa-bookmark fa-fw icon-category"></i>
                        
                            <a href="/categories/python/" class="post-category">
                                python
                            </a>
                        
                    </div>
                    
                </div>
            </div>

            <div class="post-info">
                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-minus fa-fw"></i>Publish Date:&nbsp;&nbsp;
                    2019-03-11
                </div>
                

                
                <div class="post-date info-break-policy">
                    <i class="far fa-calendar-check fa-fw"></i>Update Date:&nbsp;&nbsp;
                    2022-11-08
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-file-word fa-fw"></i>Word Count:&nbsp;&nbsp;
                    2.2k
                </div>
                

                
                <div class="info-break-policy">
                    <i class="far fa-clock fa-fw"></i>Read Times:&nbsp;&nbsp;
                    10 Min
                </div>
                

                
                    <div id="busuanzi_container_page_pv" class="info-break-policy">
                        <i class="far fa-eye fa-fw"></i>Read Count:&nbsp;&nbsp;
                        <span id="busuanzi_value_page_pv"></span>
                    </div>
				
            </div>
        </div>
        <hr class="clearfix">

        
        <!-- 是否加载使用自带的 prismjs. -->
        <link rel="stylesheet" href="/libs/prism/prism.css">
        

        

        <div class="card-content article-card-content">
            <div id="articleContent">
                <h1 id="python爬虫实践"><a href="#python爬虫实践" class="headerlink" title="python爬虫实践"></a>python爬虫实践</h1><h2 id="一-爬取bilibili网站每日排行榜"><a href="#一-爬取bilibili网站每日排行榜" class="headerlink" title="一.爬取bilibili网站每日排行榜"></a>一.爬取bilibili网站每日排行榜</h2><blockquote>
<p>思路：获取网站源码 –&gt; 解析源码，提取所需标签内容 –&gt; 爬取内容保存到excel表格中 –&gt; 将获得数据保存到sql数据库</p>
</blockquote>
<h3 id="1-获取网站源码"><a href="#1-获取网站源码" class="headerlink" title="1.获取网站源码"></a>1.获取网站源码</h3><blockquote>
<p>get请求思路：分析网页(f12) –&gt; 引入模块 –&gt; 模拟打开网页（传递发送数据） –&gt; 接受数据</p>
<p>post请求思路：分析网页(f12) –&gt; 引入模块 –&gt; 封装信息分析网页(f12) –&gt; 引入模块 –&gt; 模拟打开网页（传递发送数据） –&gt; 接受数据模拟打开网页（传递发送数据） –&gt; 接受数据</p>
</blockquote>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment">#解析源码函数，接受一个网址，返回包含源码的列表</span>
<span class="token keyword">def</span> <span class="token function">getHtml</span><span class="token punctuation">(</span>url<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">try</span><span class="token punctuation">:</span>

        header <span class="token operator">=</span> <span class="token punctuation">&#123;</span>
            <span class="token string">"user-agent"</span><span class="token punctuation">:</span> <span class="token string">"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36"</span><span class="token punctuation">&#125;</span>

        <span class="token comment"># 封装二进制对象</span>
        data <span class="token operator">=</span> <span class="token builtin">bytes</span><span class="token punctuation">(</span>urllib<span class="token punctuation">.</span>parse<span class="token punctuation">.</span>urlencode<span class="token punctuation">(</span><span class="token punctuation">&#123;</span><span class="token string">"awsl"</span><span class="token punctuation">:</span> <span class="token string">"awsl"</span><span class="token punctuation">&#125;</span><span class="token punctuation">)</span><span class="token punctuation">,</span> encoding<span class="token operator">=</span><span class="token string">"utf-8"</span><span class="token punctuation">)</span>

        <span class="token comment"># 封装url对象</span>
        re <span class="token operator">=</span> urllib<span class="token punctuation">.</span>request<span class="token punctuation">.</span>Request<span class="token punctuation">(</span>url<span class="token operator">=</span>url<span class="token punctuation">,</span> headers<span class="token operator">=</span>header<span class="token punctuation">)</span>

        <span class="token comment"># 获取网页源码</span>
        res <span class="token operator">=</span> urllib<span class="token punctuation">.</span>request<span class="token punctuation">.</span>urlopen<span class="token punctuation">(</span>re<span class="token punctuation">,</span> timeout<span class="token operator">=</span><span class="token number">1</span><span class="token punctuation">)</span>

        <span class="token comment"># 打印源码</span>
        <span class="token comment"># print(res.read().decode("utf-8"))</span>
    <span class="token keyword">except</span> urllib<span class="token punctuation">.</span>error<span class="token punctuation">.</span>URLError <span class="token keyword">as</span> e<span class="token punctuation">:</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"访问超时"</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经获得指定网站源码！"</span><span class="token punctuation">)</span>
    <span class="token keyword">return</span> res<span class="token punctuation">.</span>read<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span>decode<span class="token punctuation">(</span><span class="token string">"utf-8"</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>





<h3 id="2-解析源码，提取所需标签内容"><a href="#2-解析源码，提取所需标签内容" class="headerlink" title="2.解析源码，提取所需标签内容"></a>2.解析源码，提取所需标签内容</h3><blockquote>
<p>思路：引入模块 –&gt; 解析源码 –&gt; 获得需求信息的标签 –&gt; 设定正则判断规则 –&gt; 获得标签中需求的内容  –&gt; 封装内容</p>
</blockquote>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment">#获得指定标签内的内容,接受源码列表，输出内容列表</span>
<span class="token keyword">def</span> <span class="token function">getText</span><span class="token punctuation">(</span>html<span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token comment">#返回内容的列表</span>
    list_s <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token comment"># 使用BeautifulSoup解析源码</span>
    bs <span class="token operator">=</span> BeautifulSoup<span class="token punctuation">(</span>html<span class="token punctuation">,</span> <span class="token string">"html.parser"</span><span class="token punctuation">)</span>

    <span class="token comment">#依次解析获得内容</span>
    <span class="token keyword">for</span> item <span class="token keyword">in</span> bs<span class="token punctuation">.</span>find_all<span class="token punctuation">(</span><span class="token string">'li'</span><span class="token punctuation">,</span>class_<span class="token operator">=</span><span class="token string">'rank-item'</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        data <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
        item <span class="token operator">=</span> <span class="token builtin">str</span><span class="token punctuation">(</span>item<span class="token punctuation">)</span>

        <span class="token comment">#获得排名</span>
        pm <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findpm<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>pm<span class="token punctuation">)</span>
        <span class="token comment"># 获得名字</span>
        name <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findName<span class="token punctuation">,</span>item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        name <span class="token operator">=</span> name<span class="token punctuation">.</span>replace<span class="token punctuation">(</span><span class="token string">"\'"</span><span class="token punctuation">,</span><span class="token string">'\"'</span><span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>name<span class="token punctuation">)</span>

        <span class="token comment"># 获得番号</span>
        fh <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findfh<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>fh<span class="token punctuation">)</span>

        <span class="token comment">#获得播放地址</span>
        dz <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>finddz<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>dz<span class="token punctuation">)</span>

        <span class="token comment"># 获得播放量</span>
        bfl <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findbfl<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        bfl <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">r"\n"</span><span class="token punctuation">,</span><span class="token string">""</span><span class="token punctuation">,</span>bfl<span class="token punctuation">)</span>
        bfl <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">" "</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span> bfl<span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>bfl<span class="token punctuation">)</span>

        <span class="token comment"># 获得弹幕数</span>
        dms <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>finddms<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        dms <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">" "</span><span class="token punctuation">,</span><span class="token string">""</span><span class="token punctuation">,</span>dms<span class="token punctuation">)</span>
        dms <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">r"\n"</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span>dms<span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>dms<span class="token punctuation">)</span>

        <span class="token comment"># 获得制作方</span>
        zzf <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findzzf<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        zzf <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">" "</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span> zzf<span class="token punctuation">)</span>
        zzf <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">r"\n"</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span> zzf<span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>zzf<span class="token punctuation">)</span>

        <span class="token comment"># 获得综合分数</span>
        fs <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findfs<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>fs<span class="token punctuation">)</span>
        list_s<span class="token punctuation">.</span>append<span class="token punctuation">(</span>data<span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经获得指定标签内容！"</span><span class="token punctuation">)</span>
    <span class="token keyword">return</span> list_s<span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>







<h3 id="3-爬取内容保存到excel表格中"><a href="#3-爬取内容保存到excel表格中" class="headerlink" title="3.爬取内容保存到excel表格中"></a>3.爬取内容保存到excel表格中</h3><blockquote>
<p>思路：创建表格 –&gt; 创建工作表 –&gt; 写入数据 –&gt; 关闭保存</p>
</blockquote>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment">#将爬取内容保存到excel表格中，接受内容列表，生成excel表格</span>
<span class="token keyword">def</span> <span class="token function">getExcel</span><span class="token punctuation">(</span><span class="token builtin">list</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    list_b <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'排名'</span><span class="token punctuation">,</span><span class="token string">'视频名称'</span><span class="token punctuation">,</span><span class="token string">'视频番号'</span><span class="token punctuation">,</span><span class="token string">'播放地址'</span><span class="token punctuation">,</span><span class="token string">'观看次数'</span><span class="token punctuation">,</span><span class="token string">'弹幕数量'</span><span class="token punctuation">,</span><span class="token string">'制作者'</span><span class="token punctuation">,</span><span class="token string">'综合分数'</span><span class="token punctuation">]</span>
    list_n <span class="token operator">=</span> <span class="token builtin">list</span>
    <span class="token comment">#创建一个表格</span>
    workbook <span class="token operator">=</span> xlwt<span class="token punctuation">.</span>Workbook<span class="token punctuation">(</span>encoding<span class="token operator">=</span><span class="token string">"utf-8"</span><span class="token punctuation">)</span>
    <span class="token comment">#创建一个工作表</span>
    worksheet <span class="token operator">=</span> workbook<span class="token punctuation">.</span>add_sheet<span class="token punctuation">(</span><span class="token string">"哔哩哔哩排行榜"</span><span class="token punctuation">)</span>
    <span class="token comment">#写入数据</span>
    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">8</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        <span class="token comment">#写入表头</span>
        worksheet<span class="token punctuation">.</span>write<span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span>i<span class="token punctuation">,</span>list_b<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span>

    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">100</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        <span class="token keyword">for</span> j <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">8</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
            <span class="token comment">#写入内容</span>
            worksheet<span class="token punctuation">.</span>write<span class="token punctuation">(</span>i<span class="token operator">+</span><span class="token number">1</span><span class="token punctuation">,</span>j<span class="token punctuation">,</span> list_n<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">[</span>j<span class="token punctuation">]</span><span class="token punctuation">)</span>

    <span class="token comment">#关闭并保存</span>
    workbook<span class="token punctuation">.</span>save<span class="token punctuation">(</span><span class="token string">"bilibili每日排行.xls"</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经将指定内容保存于excel表格中！"</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>







<h3 id="4-将获得数据保存到sql数据库"><a href="#4-将获得数据保存到sql数据库" class="headerlink" title="4.将获得数据保存到sql数据库"></a>4.将获得数据保存到sql数据库</h3><blockquote>
<p>思路：创建或打开数据库 –&gt; 创建游标  –&gt; 将sql语句交给游标执行 –&gt;  提交数据库操作 –&gt; 退出并关闭数据库</p>
</blockquote>
<pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment">#将获得数据保存到sql数据库,接收数据库名，表单名，数据列表</span>
<span class="token keyword">def</span> <span class="token function">getSql</span><span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">,</span><span class="token builtin">list</span><span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token comment">#创建表，并添加表头</span>
    createsql<span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">)</span>

    <span class="token comment">#将爬取 的数据列表放入数据库</span>
    <span class="token keyword">for</span> item <span class="token keyword">in</span> <span class="token builtin">list</span><span class="token punctuation">:</span>
        sql <span class="token operator">=</span> <span class="token triple-quoted-string string">'''
            insert into %s values ('%s','%s','%s','%s','%s','%s','%s','%s')
        '''</span><span class="token operator">%</span><span class="token punctuation">(</span>table_name<span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">6</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">7</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

        intosql<span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>sql<span class="token punctuation">)</span>

    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经获得数据保存到sql数据库"</span><span class="token punctuation">)</span>



<span class="token comment">#选定数据库，执行sql语句，接受数据库名，sql语句</span>
<span class="token keyword">def</span> <span class="token function">intosql</span><span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>sql<span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token comment">#创建或者打开数据库</span>
    conn <span class="token operator">=</span> sqlite3<span class="token punctuation">.</span>connect<span class="token punctuation">(</span>dbpath<span class="token punctuation">)</span>

    <span class="token comment">#创建游标</span>
    c <span class="token operator">=</span> conn<span class="token punctuation">.</span>cursor<span class="token punctuation">(</span><span class="token punctuation">)</span>

    <span class="token keyword">try</span><span class="token punctuation">:</span>
        <span class="token comment">#使用游标执行语句</span>
        c<span class="token punctuation">.</span>execute<span class="token punctuation">(</span>sql<span class="token punctuation">)</span>

        <span class="token comment">#提交数据库操作</span>
        conn<span class="token punctuation">.</span>commit<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">except</span> Exception<span class="token punctuation">:</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"执行sql语句失败"</span><span class="token punctuation">)</span>
    <span class="token keyword">finally</span><span class="token punctuation">:</span>
        <span class="token comment">#关闭数据库</span>
        conn<span class="token punctuation">.</span>close<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经执行sql语句！"</span><span class="token punctuation">)</span>


    
<span class="token comment">#创建数据库，添加表头，接收数据库名，表单名</span>
<span class="token keyword">def</span> <span class="token function">createsql</span><span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">)</span><span class="token punctuation">:</span>
    list_id <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'list'</span><span class="token punctuation">,</span><span class="token string">'name'</span><span class="token punctuation">,</span><span class="token string">'id'</span><span class="token punctuation">,</span><span class="token string">'location'</span><span class="token punctuation">,</span><span class="token string">'number'</span><span class="token punctuation">,</span><span class="token string">'barrage'</span><span class="token punctuation">,</span><span class="token string">'maker'</span><span class="token punctuation">,</span><span class="token string">'mark'</span><span class="token punctuation">]</span>
    sql <span class="token operator">=</span> <span class="token triple-quoted-string string">'''
        create table %s(
            %s varchar ,
            %s varchar,
            %s varchar,
            %s varchar,
            %s varchar ,
            %s varchar,
            %s varchar,
            %s varchar 
        )
    '''</span><span class="token operator">%</span><span class="token punctuation">(</span>table_name<span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">6</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">7</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    <span class="token comment"># 创建或者打开数据库</span>
    conn <span class="token operator">=</span> sqlite3<span class="token punctuation">.</span>connect<span class="token punctuation">(</span>dbpath<span class="token punctuation">)</span>
    <span class="token comment"># 创建游标</span>
    c <span class="token operator">=</span> conn<span class="token punctuation">.</span>cursor<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">try</span><span class="token punctuation">:</span>
        <span class="token comment"># 使用游标执行语句</span>
        c<span class="token punctuation">.</span>execute<span class="token punctuation">(</span>sql<span class="token punctuation">)</span>

        <span class="token comment"># 提交数据库操作</span>
        conn<span class="token punctuation">.</span>commit<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">except</span> Exception<span class="token punctuation">:</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"执行sql语句失败"</span><span class="token punctuation">)</span>
    <span class="token keyword">finally</span><span class="token punctuation">:</span>
        <span class="token comment"># 关闭数据库</span>
        conn<span class="token punctuation">.</span>close<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经创建指定数据库，添加表头！"</span><span class="token punctuation">)</span><span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>





<h3 id="5-总实践代码"><a href="#5-总实践代码" class="headerlink" title="5.总实践代码"></a>5.总实践代码</h3><pre class="line-numbers language-python" data-language="python"><code class="language-python"><span class="token comment"># -*- codeing = utf-8 -*-</span>
<span class="token comment">#@Time: 2021/2/22 19:33</span>
<span class="token comment">#@Name: 凡诚</span>
<span class="token comment">#@File：bilibili</span>
<span class="token comment">#@Software PyCharm</span>

<span class="token keyword">import</span> urllib<span class="token punctuation">.</span>request<span class="token punctuation">,</span>urllib<span class="token punctuation">.</span>parse
<span class="token keyword">import</span> re
<span class="token keyword">from</span> bs4 <span class="token keyword">import</span> BeautifulSoup
<span class="token keyword">import</span> xlwt
<span class="token keyword">import</span> sqlite3

<span class="token comment">#爬取网页地址</span>
url <span class="token operator">=</span> <span class="token string">"https://www.bilibili.com/v/popular/rank/all"</span>

<span class="token comment">##正则表达式判断规则：</span>
<span class="token comment">#获得排名</span>
findpm <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'data-rank="(\d*)">&lt;div'</span><span class="token punctuation">)</span>
<span class="token comment">#获得名字</span>
findName <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'href=".*" target="_blank">(.*)&lt;/a> &lt;!-- --> '</span><span class="token punctuation">)</span>
<span class="token comment">#获得番号</span>
findfh <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'li class="rank-item" data-id="(.*)" data-rank'</span><span class="token punctuation">)</span>
<span class="token comment">#获得播放地址</span>
finddz <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'a href="//(.*)" target="_blank">&lt;img'</span><span class="token punctuation">)</span>
<span class="token comment">#获得播放量</span>
findbfl <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'&lt;i class="b-icon play">&lt;/i>\n(.*)&lt;/span> &lt;span class="data-box">'</span><span class="token punctuation">,</span>re<span class="token punctuation">.</span>S<span class="token punctuation">)</span>
<span class="token comment">#获得弹幕数</span>
finddms <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'&lt;/span> &lt;span class="data-box">&lt;i class="b-icon view">&lt;/i>(.*)&lt;/span> &lt;a'</span><span class="token punctuation">,</span>re<span class="token punctuation">.</span>S<span class="token punctuation">)</span>
<span class="token comment">#获得制作方</span>
findzzf <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'&lt;span class="data-box up-name">&lt;i class="b-icon author">&lt;/i>(.*)&lt;/span>&lt;/a>&lt;/div> &lt;div class="pts">'</span><span class="token punctuation">,</span>re<span class="token punctuation">.</span>S<span class="token punctuation">)</span>
<span class="token comment">#获得综合分数</span>
findfs <span class="token operator">=</span> re<span class="token punctuation">.</span><span class="token builtin">compile</span><span class="token punctuation">(</span><span class="token string">r'&lt;div>(\d*)&lt;/div>综合得分'</span><span class="token punctuation">)</span>



<span class="token comment">#获得指定标签内的内容</span>
<span class="token keyword">def</span> <span class="token function">getText</span><span class="token punctuation">(</span>html<span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token comment">#返回内容的列表</span>
    list_s <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    <span class="token comment"># 使用BeautifulSoup解析源码</span>
    bs <span class="token operator">=</span> BeautifulSoup<span class="token punctuation">(</span>html<span class="token punctuation">,</span> <span class="token string">"html.parser"</span><span class="token punctuation">)</span>

    <span class="token comment">#依次解析获得内容</span>
    <span class="token keyword">for</span> item <span class="token keyword">in</span> bs<span class="token punctuation">.</span>find_all<span class="token punctuation">(</span><span class="token string">'li'</span><span class="token punctuation">,</span>class_<span class="token operator">=</span><span class="token string">'rank-item'</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        data <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
        item <span class="token operator">=</span> <span class="token builtin">str</span><span class="token punctuation">(</span>item<span class="token punctuation">)</span>

        <span class="token comment">#获得排名</span>
        pm <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findpm<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>pm<span class="token punctuation">)</span>
        <span class="token comment"># 获得名字</span>
        name <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findName<span class="token punctuation">,</span>item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        name <span class="token operator">=</span> name<span class="token punctuation">.</span>replace<span class="token punctuation">(</span><span class="token string">"\'"</span><span class="token punctuation">,</span><span class="token string">'\"'</span><span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>name<span class="token punctuation">)</span>

        <span class="token comment"># 获得番号</span>
        fh <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findfh<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>fh<span class="token punctuation">)</span>

        <span class="token comment">#获得播放地址</span>
        dz <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>finddz<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>dz<span class="token punctuation">)</span>

        <span class="token comment"># 获得播放量</span>
        bfl <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findbfl<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        bfl <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">r"\n"</span><span class="token punctuation">,</span><span class="token string">""</span><span class="token punctuation">,</span>bfl<span class="token punctuation">)</span>
        bfl <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">" "</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span> bfl<span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>bfl<span class="token punctuation">)</span>

        <span class="token comment"># 获得弹幕数</span>
        dms <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>finddms<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        dms <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">" "</span><span class="token punctuation">,</span><span class="token string">""</span><span class="token punctuation">,</span>dms<span class="token punctuation">)</span>
        dms <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">r"\n"</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span>dms<span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>dms<span class="token punctuation">)</span>

        <span class="token comment"># 获得制作方</span>
        zzf <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findzzf<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        zzf <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">" "</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span> zzf<span class="token punctuation">)</span>
        zzf <span class="token operator">=</span> re<span class="token punctuation">.</span>sub<span class="token punctuation">(</span><span class="token string">r"\n"</span><span class="token punctuation">,</span> <span class="token string">""</span><span class="token punctuation">,</span> zzf<span class="token punctuation">)</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>zzf<span class="token punctuation">)</span>

        <span class="token comment"># 获得综合分数</span>
        fs <span class="token operator">=</span> re<span class="token punctuation">.</span>findall<span class="token punctuation">(</span>findfs<span class="token punctuation">,</span> item<span class="token punctuation">)</span><span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span>
        data<span class="token punctuation">.</span>append<span class="token punctuation">(</span>fs<span class="token punctuation">)</span>
        list_s<span class="token punctuation">.</span>append<span class="token punctuation">(</span>data<span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经获得指定标签内容！"</span><span class="token punctuation">)</span>
    <span class="token keyword">return</span> list_s



<span class="token comment">#获取指定网页源码</span>
<span class="token keyword">def</span> <span class="token function">getHtml</span><span class="token punctuation">(</span>url<span class="token punctuation">)</span><span class="token punctuation">:</span>
    <span class="token keyword">try</span><span class="token punctuation">:</span>

        header <span class="token operator">=</span> <span class="token punctuation">&#123;</span>
            <span class="token string">"user-agent"</span><span class="token punctuation">:</span> <span class="token string">"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36"</span><span class="token punctuation">&#125;</span>

        <span class="token comment"># 封装二进制对象</span>
        data <span class="token operator">=</span> <span class="token builtin">bytes</span><span class="token punctuation">(</span>urllib<span class="token punctuation">.</span>parse<span class="token punctuation">.</span>urlencode<span class="token punctuation">(</span><span class="token punctuation">&#123;</span><span class="token string">"awsl"</span><span class="token punctuation">:</span> <span class="token string">"awsl"</span><span class="token punctuation">&#125;</span><span class="token punctuation">)</span><span class="token punctuation">,</span> encoding<span class="token operator">=</span><span class="token string">"utf-8"</span><span class="token punctuation">)</span>

        <span class="token comment"># 封装url对象</span>
        re <span class="token operator">=</span> urllib<span class="token punctuation">.</span>request<span class="token punctuation">.</span>Request<span class="token punctuation">(</span>url<span class="token operator">=</span>url<span class="token punctuation">,</span> headers<span class="token operator">=</span>header<span class="token punctuation">)</span>

        <span class="token comment"># 获取网页源码</span>
        res <span class="token operator">=</span> urllib<span class="token punctuation">.</span>request<span class="token punctuation">.</span>urlopen<span class="token punctuation">(</span>re<span class="token punctuation">,</span> timeout<span class="token operator">=</span><span class="token number">1</span><span class="token punctuation">)</span>

        <span class="token comment"># 打印源码</span>
        <span class="token comment"># print(res.read().decode("utf-8"))</span>
    <span class="token keyword">except</span> urllib<span class="token punctuation">.</span>error<span class="token punctuation">.</span>URLError <span class="token keyword">as</span> e<span class="token punctuation">:</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"访问超时"</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经获得指定网站源码！"</span><span class="token punctuation">)</span>
    <span class="token keyword">return</span> res<span class="token punctuation">.</span>read<span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">.</span>decode<span class="token punctuation">(</span><span class="token string">"utf-8"</span><span class="token punctuation">)</span>



<span class="token comment">#将爬取内容保存到excel表格中</span>
<span class="token keyword">def</span> <span class="token function">getExcel</span><span class="token punctuation">(</span><span class="token builtin">list</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
    list_b <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'排名'</span><span class="token punctuation">,</span><span class="token string">'视频名称'</span><span class="token punctuation">,</span><span class="token string">'视频番号'</span><span class="token punctuation">,</span><span class="token string">'播放地址'</span><span class="token punctuation">,</span><span class="token string">'观看次数'</span><span class="token punctuation">,</span><span class="token string">'弹幕数量'</span><span class="token punctuation">,</span><span class="token string">'制作者'</span><span class="token punctuation">,</span><span class="token string">'综合分数'</span><span class="token punctuation">]</span>
    list_n <span class="token operator">=</span> <span class="token builtin">list</span>
    <span class="token comment">#创建一个表格</span>
    workbook <span class="token operator">=</span> xlwt<span class="token punctuation">.</span>Workbook<span class="token punctuation">(</span>encoding<span class="token operator">=</span><span class="token string">"utf-8"</span><span class="token punctuation">)</span>
    <span class="token comment">#创建一个工作表</span>
    worksheet <span class="token operator">=</span> workbook<span class="token punctuation">.</span>add_sheet<span class="token punctuation">(</span><span class="token string">"哔哩哔哩排行榜"</span><span class="token punctuation">)</span>
    <span class="token comment">#写入数据</span>
    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span><span class="token number">8</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        <span class="token comment">#写入表头</span>
        worksheet<span class="token punctuation">.</span>write<span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span>i<span class="token punctuation">,</span>list_b<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">)</span>

    <span class="token keyword">for</span> i <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">100</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
        <span class="token keyword">for</span> j <span class="token keyword">in</span> <span class="token builtin">range</span><span class="token punctuation">(</span><span class="token number">0</span><span class="token punctuation">,</span> <span class="token number">8</span><span class="token punctuation">)</span><span class="token punctuation">:</span>
            <span class="token comment">#写入内容</span>
            worksheet<span class="token punctuation">.</span>write<span class="token punctuation">(</span>i<span class="token operator">+</span><span class="token number">1</span><span class="token punctuation">,</span>j<span class="token punctuation">,</span> list_n<span class="token punctuation">[</span>i<span class="token punctuation">]</span><span class="token punctuation">[</span>j<span class="token punctuation">]</span><span class="token punctuation">)</span>

    <span class="token comment">#关闭并保存</span>
    workbook<span class="token punctuation">.</span>save<span class="token punctuation">(</span><span class="token string">"bilibili每日排行.xls"</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经将指定内容保存于excel表格中！"</span><span class="token punctuation">)</span>




<span class="token comment">#将获得数据保存到sql数据库</span>
<span class="token keyword">def</span> <span class="token function">getSql</span><span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">,</span><span class="token builtin">list</span><span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token comment">#创建表，并添加表头</span>
    createsql<span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">)</span>

    <span class="token comment">#将爬取 的数据列表放入数据库</span>
    <span class="token keyword">for</span> item <span class="token keyword">in</span> <span class="token builtin">list</span><span class="token punctuation">:</span>
        sql <span class="token operator">=</span> <span class="token triple-quoted-string string">'''
            insert into %s values ('%s','%s','%s','%s','%s','%s','%s','%s')
        '''</span><span class="token operator">%</span><span class="token punctuation">(</span>table_name<span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">6</span><span class="token punctuation">]</span><span class="token punctuation">,</span>item<span class="token punctuation">[</span><span class="token number">7</span><span class="token punctuation">]</span><span class="token punctuation">)</span>

        intosql<span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>sql<span class="token punctuation">)</span>

    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经获得数据保存到sql数据库"</span><span class="token punctuation">)</span>



<span class="token comment">#选定数据库，执行sql语句</span>
<span class="token keyword">def</span> <span class="token function">intosql</span><span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>sql<span class="token punctuation">)</span><span class="token punctuation">:</span>

    <span class="token comment">#创建或者打开数据库</span>
    conn <span class="token operator">=</span> sqlite3<span class="token punctuation">.</span>connect<span class="token punctuation">(</span>dbpath<span class="token punctuation">)</span>

    <span class="token comment">#创建游标</span>
    c <span class="token operator">=</span> conn<span class="token punctuation">.</span>cursor<span class="token punctuation">(</span><span class="token punctuation">)</span>

    <span class="token keyword">try</span><span class="token punctuation">:</span>
        <span class="token comment">#使用游标执行语句</span>
        c<span class="token punctuation">.</span>execute<span class="token punctuation">(</span>sql<span class="token punctuation">)</span>

        <span class="token comment">#提交数据库操作</span>
        conn<span class="token punctuation">.</span>commit<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">except</span> Exception<span class="token punctuation">:</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"执行sql语句失败"</span><span class="token punctuation">)</span>
    <span class="token keyword">finally</span><span class="token punctuation">:</span>
        <span class="token comment">#关闭数据库</span>
        conn<span class="token punctuation">.</span>close<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经执行sql语句！"</span><span class="token punctuation">)</span>


<span class="token comment">#创建数据库，添加表头</span>
<span class="token keyword">def</span> <span class="token function">createsql</span><span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">)</span><span class="token punctuation">:</span>
    list_id <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token string">'list'</span><span class="token punctuation">,</span><span class="token string">'name'</span><span class="token punctuation">,</span><span class="token string">'id'</span><span class="token punctuation">,</span><span class="token string">'location'</span><span class="token punctuation">,</span><span class="token string">'number'</span><span class="token punctuation">,</span><span class="token string">'barrage'</span><span class="token punctuation">,</span><span class="token string">'maker'</span><span class="token punctuation">,</span><span class="token string">'mark'</span><span class="token punctuation">]</span>
    sql <span class="token operator">=</span> <span class="token triple-quoted-string string">'''
        create table %s(
            %s varchar ,
            %s varchar,
            %s varchar,
            %s varchar,
            %s varchar ,
            %s varchar,
            %s varchar,
            %s varchar 
        )
    '''</span><span class="token operator">%</span><span class="token punctuation">(</span>table_name<span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">0</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">1</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">2</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">3</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">4</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">5</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">6</span><span class="token punctuation">]</span><span class="token punctuation">,</span>list_id<span class="token punctuation">[</span><span class="token number">7</span><span class="token punctuation">]</span><span class="token punctuation">)</span>
    <span class="token comment"># 创建或者打开数据库</span>
    conn <span class="token operator">=</span> sqlite3<span class="token punctuation">.</span>connect<span class="token punctuation">(</span>dbpath<span class="token punctuation">)</span>
    <span class="token comment"># 创建游标</span>
    c <span class="token operator">=</span> conn<span class="token punctuation">.</span>cursor<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">try</span><span class="token punctuation">:</span>
        <span class="token comment"># 使用游标执行语句</span>
        c<span class="token punctuation">.</span>execute<span class="token punctuation">(</span>sql<span class="token punctuation">)</span>

        <span class="token comment"># 提交数据库操作</span>
        conn<span class="token punctuation">.</span>commit<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">except</span> Exception<span class="token punctuation">:</span>
        <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"执行sql语句失败"</span><span class="token punctuation">)</span>
    <span class="token keyword">finally</span><span class="token punctuation">:</span>
        <span class="token comment"># 关闭数据库</span>
        conn<span class="token punctuation">.</span>close<span class="token punctuation">(</span><span class="token punctuation">)</span>
    <span class="token keyword">print</span><span class="token punctuation">(</span><span class="token string">"已经创建指定数据库，添加表头！"</span><span class="token punctuation">)</span>


<span class="token keyword">if</span> __name__ <span class="token operator">==</span> <span class="token string">"__main__"</span><span class="token punctuation">:</span>

    <span class="token builtin">list</span> <span class="token operator">=</span> <span class="token punctuation">[</span><span class="token punctuation">]</span>
    dbpath <span class="token operator">=</span> <span class="token string">"bilibiliTop"</span>
    table_name <span class="token operator">=</span> <span class="token string">"bilibiliTop100"</span>
    <span class="token comment">#获取网页源码</span>
    html <span class="token operator">=</span> getHtml<span class="token punctuation">(</span>url<span class="token punctuation">)</span>

    <span class="token comment">#获取标签内容</span>
    <span class="token builtin">list</span> <span class="token operator">=</span> getText<span class="token punctuation">(</span>html<span class="token punctuation">)</span>

    <span class="token comment">#将获取数据写入excel表格</span>
    <span class="token comment">#getExcel(list)</span>

    <span class="token comment">#将获取数据写入sql数据库</span>
    getSql<span class="token punctuation">(</span>dbpath<span class="token punctuation">,</span>table_name<span class="token punctuation">,</span><span class="token builtin">list</span><span class="token punctuation">)</span>


<span aria-hidden="true" class="line-numbers-rows"><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span><span></span></span></code></pre>


                
            </div>
            <hr/>

            

    <div class="reprint" id="reprint-statement">
        
            <div class="reprint__author">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-user">
                        Author:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="/about" rel="external nofollow noreferrer">凡诚</a>
                </span>
            </div>
            <div class="reprint__type">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-link">
                        Link:
                    </i>
                </span>
                <span class="reprint-info">
                    <a href="http://example.com/2019/03/11/pythonpc-shi-jian-huo-qu-b-zhan/">http://example.com/2019/03/11/pythonpc-shi-jian-huo-qu-b-zhan/</a>
                </span>
            </div>
            <div class="reprint__notice">
                <span class="reprint-meta" style="font-weight: bold;">
                    <i class="fas fa-copyright">
                        Reprint policy:
                    </i>
                </span>
                <span class="reprint-info">
                    All articles in this blog are used except for special statements
                    <a href="https://creativecommons.org/licenses/by/4.0/deed.zh" rel="external nofollow noreferrer" target="_blank">CC BY 4.0</a>
                    reprint polocy. If reproduced, please indicate source
                    <a href="/about" target="_blank">凡诚</a>
                    !
                </span>
            </div>
        
    </div>

    <script async defer>
      document.addEventListener("copy", function (e) {
        let toastHTML = '<span>Copied successfully, please follow the reprint policy of this article</span><button class="btn-flat toast-action" onclick="navToReprintStatement()" style="font-size: smaller">more</a>';
        M.toast({html: toastHTML})
      });

      function navToReprintStatement() {
        $("html, body").animate({scrollTop: $("#reprint-statement").offset().top - 80}, 800);
      }
    </script>



            <div class="tag_share" style="display: block;">
                <div class="post-meta__tag-list" style="display: inline-block;">
                    
                        <div class="article-tag">
                            
                                <a href="/tags/python/">
                                    <span class="chip bg-color">python</span>
                                </a>
                            
                                <a href="/tags/%E7%88%AC%E8%99%AB/">
                                    <span class="chip bg-color">爬虫</span>
                                </a>
                            
                        </div>
                    
                </div>
                <div class="post_share" style="zoom: 80%; width: fit-content; display: inline-block; float: right; margin: -0.15rem 0;">
                    <link rel="stylesheet" type="text/css" href="/libs/share/css/share.min.css">
<div id="article-share">

    
    <div class="social-share" data-sites="twitter,facebook,google,qq,qzone,wechat,weibo,douban,linkedin" data-wechat-qrcode-helper="<p>微信扫一扫即可分享！</p>"></div>
    <script src="/libs/share/js/social-share.min.js"></script>
    

    

</div>

                </div>
            </div>
            
                <style>
    #reward {
        margin: 40px 0;
        text-align: center;
    }

    #reward .reward-link {
        font-size: 1.4rem;
        line-height: 38px;
    }

    #reward .btn-floating:hover {
        box-shadow: 0 6px 12px rgba(0, 0, 0, 0.2), 0 5px 15px rgba(0, 0, 0, 0.2);
    }

    #rewardModal {
        width: 320px;
        height: 350px;
    }

    #rewardModal .reward-title {
        margin: 15px auto;
        padding-bottom: 5px;
    }

    #rewardModal .modal-content {
        padding: 10px;
    }

    #rewardModal .close {
        position: absolute;
        right: 15px;
        top: 15px;
        color: rgba(0, 0, 0, 0.5);
        font-size: 1.3rem;
        line-height: 20px;
        cursor: pointer;
    }

    #rewardModal .close:hover {
        color: #ef5350;
        transform: scale(1.3);
        -moz-transform:scale(1.3);
        -webkit-transform:scale(1.3);
        -o-transform:scale(1.3);
    }

    #rewardModal .reward-tabs {
        margin: 0 auto;
        width: 210px;
    }

    .reward-tabs .tabs {
        height: 38px;
        margin: 10px auto;
        padding-left: 0;
    }

    .reward-content ul {
        padding-left: 0 !important;
    }

    .reward-tabs .tabs .tab {
        height: 38px;
        line-height: 38px;
    }

    .reward-tabs .tab a {
        color: #fff;
        background-color: #ccc;
    }

    .reward-tabs .tab a:hover {
        background-color: #ccc;
        color: #fff;
    }

    .reward-tabs .wechat-tab .active {
        color: #fff !important;
        background-color: #22AB38 !important;
    }

    .reward-tabs .alipay-tab .active {
        color: #fff !important;
        background-color: #019FE8 !important;
    }

    .reward-tabs .reward-img {
        width: 210px;
        height: 210px;
    }
</style>

<div id="reward">
    <a href="#rewardModal" class="reward-link modal-trigger btn-floating btn-medium waves-effect waves-light red">赏</a>

    <!-- Modal Structure -->
    <div id="rewardModal" class="modal">
        <div class="modal-content">
            <a class="close modal-close"><i class="fas fa-times"></i></a>
            <h4 class="reward-title">你的赏识是我前进的动力</h4>
            <div class="reward-content">
                <div class="reward-tabs">
                    <ul class="tabs row">
                        <li class="tab col s6 alipay-tab waves-effect waves-light"><a href="#alipay">支付宝</a></li>
                        <li class="tab col s6 wechat-tab waves-effect waves-light"><a href="#wechat">微 信</a></li>
                    </ul>
                    <div id="alipay">
                        <img src="/medias/reward/alipay.jpg" class="reward-img" alt="支付宝打赏二维码">
                    </div>
                    <div id="wechat">
                        <img src="/medias/reward/wechat.png" class="reward-img" alt="微信打赏二维码">
                    </div>
                </div>
            </div>
        </div>
    </div>
</div>

<script>
    $(function () {
        $('.tabs').tabs();
    });
</script>

            
        </div>
    </div>

    

    

    

    
    <div class="livere-card card" data-aos="fade-up">
    <!-- 来必力City版安装代码 -->
    <div id="lv-container" class="card-content" data-id="city" data-uid="MTAyMC81NzU4NC8zNDA0OA==">
        <script type="text/javascript">
            (function (d, s) {
                let j, e = d.getElementsByTagName(s)[0];
                if (typeof LivereTower === 'function') {
                    return;
                }

                j = d.createElement(s);
                j.src = 'https://cdn-city.livere.com/js/embed.dist.js';
                j.async = true;

                e.parentNode.insertBefore(j, e);
            })(document, 'script');
        </script>
        <noscript>为正常使用来必力评论功能请激活JavaScript。</noscript>
    </div>
    <!-- City版安装代码已完成 -->
</div>
    

    

    

    

    

<article id="prenext-posts" class="prev-next articles">
    <div class="row article-row">
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge left-badge text-color">
                <i class="fas fa-chevron-left"></i>&nbsp;Previous</div>
            <div class="card">
                <a href="/2019/11/08/cad-ji-chu/">
                    <div class="card-image">
                        
                        
                        <img src="/medias/featureimages/2.jpg" class="responsive-img" alt="cad基础">
                        
                        <span class="card-title">cad基础</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            CAD，全称为管理软件计算机辅助设计（Management Software Computer Aided Design,MS-CAD）是指运用计算机软件在图形化开发界面上进行管理软件的设计，通过设计管理软件的流程结构、数据结构，最终通过计算机软件系统的自动数据加载、解析生成能够独立应用的管理软件的过程。
                        
                    </div>
                    <div class="publish-info">
                        <span class="publish-date">
                            <i class="far fa-clock fa-fw icon-date"></i>2019-11-08
                        </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/%E6%9D%82%E9%A1%B9%E6%8A%80%E6%9C%AF/" class="post-category">
                                    杂项技术
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/cad%E5%9F%BA%E7%A1%80/">
                        <span class="chip bg-color">cad基础</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
        
        <div class="article col s12 m6" data-aos="fade-up">
            <div class="article-badge right-badge text-color">
                Next&nbsp;<i class="fas fa-chevron-right"></i>
            </div>
            <div class="card">
                <a href="/2019/03/11/python-ji-chu-yu-fa/">
                    <div class="card-image">
                        
                        
                        <img src="/medias/featureimages/5.jpg" class="responsive-img" alt="python基础知识">
                        
                        <span class="card-title">python基础知识</span>
                    </div>
                </a>
                <div class="card-content article-content">
                    <div class="summary block-with-text">
                        
                            爬虫一般指网络爬虫。 网络爬虫（又称为网页蜘蛛，网络机器人，在FOAF社区中间，更经常的称为网页追逐者），是一种按照一定的规则，自动地抓取万维网信息的程序或者脚本。
                        
                    </div>
                    <div class="publish-info">
                            <span class="publish-date">
                                <i class="far fa-clock fa-fw icon-date"></i>2019-03-11
                            </span>
                        <span class="publish-author">
                            
                            <i class="fas fa-bookmark fa-fw icon-category"></i>
                            
                            <a href="/categories/python/" class="post-category">
                                    python
                                </a>
                            
                            
                        </span>
                    </div>
                </div>
                
                <div class="card-action article-tags">
                    
                    <a href="/tags/python/">
                        <span class="chip bg-color">python</span>
                    </a>
                    
                </div>
                
            </div>
        </div>
        
    </div>
</article>

</div>



<!-- 代码块功能依赖 -->
<script type="text/javascript" src="/libs/codeBlock/codeBlockFuction.js"></script>

<!-- 代码语言 -->

<script type="text/javascript" src="/libs/codeBlock/codeLang.js"></script>


<!-- 代码块复制 -->

<script type="text/javascript" src="/libs/codeBlock/codeCopy.js"></script>


<!-- 代码块收缩 -->

<script type="text/javascript" src="/libs/codeBlock/codeShrink.js"></script>


    </div>
    <div id="toc-aside" class="expanded col l3 hide-on-med-and-down">
        <div class="toc-widget card" style="background-color: white;">
            <div class="toc-title"><i class="far fa-list-alt"></i>&nbsp;&nbsp;TOC</div>
            <div id="toc-content"></div>
        </div>
    </div>
</div>

<!-- TOC 悬浮按钮. -->

<div id="floating-toc-btn" class="hide-on-med-and-down">
    <a class="btn-floating btn-large bg-color">
        <i class="fas fa-list-ul"></i>
    </a>
</div>


<script src="/libs/tocbot/tocbot.min.js"></script>
<script>
    $(function () {
        tocbot.init({
            tocSelector: '#toc-content',
            contentSelector: '#articleContent',
            headingsOffset: -($(window).height() * 0.4 - 45),
            collapseDepth: Number('0'),
            headingSelector: 'h2, h3, h4'
        });

        // modify the toc link href to support Chinese.
        let i = 0;
        let tocHeading = 'toc-heading-';
        $('#toc-content a').each(function () {
            $(this).attr('href', '#' + tocHeading + (++i));
        });

        // modify the heading title id to support Chinese.
        i = 0;
        $('#articleContent').children('h2, h3, h4').each(function () {
            $(this).attr('id', tocHeading + (++i));
        });

        // Set scroll toc fixed.
        let tocHeight = parseInt($(window).height() * 0.4 - 64);
        let $tocWidget = $('.toc-widget');
        $(window).scroll(function () {
            let scroll = $(window).scrollTop();
            /* add post toc fixed. */
            if (scroll > tocHeight) {
                $tocWidget.addClass('toc-fixed');
            } else {
                $tocWidget.removeClass('toc-fixed');
            }
        });

        
        /* 修复文章卡片 div 的宽度. */
        let fixPostCardWidth = function (srcId, targetId) {
            let srcDiv = $('#' + srcId);
            if (srcDiv.length === 0) {
                return;
            }

            let w = srcDiv.width();
            if (w >= 450) {
                w = w + 21;
            } else if (w >= 350 && w < 450) {
                w = w + 18;
            } else if (w >= 300 && w < 350) {
                w = w + 16;
            } else {
                w = w + 14;
            }
            $('#' + targetId).width(w);
        };

        // 切换TOC目录展开收缩的相关操作.
        const expandedClass = 'expanded';
        let $tocAside = $('#toc-aside');
        let $mainContent = $('#main-content');
        $('#floating-toc-btn .btn-floating').click(function () {
            if ($tocAside.hasClass(expandedClass)) {
                $tocAside.removeClass(expandedClass).hide();
                $mainContent.removeClass('l9');
            } else {
                $tocAside.addClass(expandedClass).show();
                $mainContent.addClass('l9');
            }
            fixPostCardWidth('artDetail', 'prenext-posts');
        });
        
    });
</script>

    

</main>




    <footer class="page-footer bg-color">
    
        <link rel="stylesheet" href="/libs/aplayer/APlayer.min.css">
<style>
    .aplayer .aplayer-lrc p {
        
        font-size: 12px;
        font-weight: 700;
        line-height: 16px !important;
    }

    .aplayer .aplayer-lrc p.aplayer-lrc-current {
        
        font-size: 15px;
        color: blue;
    }

    
    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body {
        left: -66px !important;
    }

    .aplayer.aplayer-fixed.aplayer-narrow .aplayer-body:hover {
        left: 0px !important;
    }

    
</style>
<div class="">
    
    <div class="row">
        <meting-js class="col l8 offset-l2 m10 offset-m1 s12"
                   server="netease"
                   type="playlist"
                   id="2477330090"
                   fixed='true'
                   autoplay='false'
                   theme='blue'
                   loop='all'
                   order='random'
                   preload='auto'
                   volume='0.7'
                   list-folded='true'
        >
        </meting-js>
    </div>
</div>

<script src="/libs/aplayer/APlayer.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/meting@2/dist/Meting.min.js"></script>

    
    <div class="container row center-align" style="margin-bottom: 15px !important;">
        <div class="col s12 m8 l8 copy-right">
            Copyright&nbsp;&copy;
            
                <span id="year">2022</span>
            
            <span id="year">2022</span>
            <a href="/about" target="_blank">凡诚</a>
            &nbsp;&nbsp;<a href="https://hexo.io/" target="_blank"></a>
            |&nbsp;Theme&nbsp;<a href="https://github.com/blinkfox/hexo-theme-matery" target="_blank">Matery</a>
            <br>
            
            &nbsp;<i class="fas fa-chart-area"></i>&nbsp;站点总字数:&nbsp;<span
                class="white-color">607.1k</span>&nbsp;字
            
            
            
            
            
            
            <span id="busuanzi_container_site_pv">
                |&nbsp;<i class="far fa-eye"></i>&nbsp;总访问量:&nbsp;<span id="busuanzi_value_site_pv"
                    class="white-color"></span>&nbsp;次
            </span>
            
            
            <span id="busuanzi_container_site_uv">
                |&nbsp;<i class="fas fa-users"></i>&nbsp;总访问人数:&nbsp;<span id="busuanzi_value_site_uv"
                    class="white-color"></span>&nbsp;人
            </span>
            
            <br>
            
            <span id="sitetime">载入运行时间...</span>
            <script>
                function siteTime() {
                    var seconds = 1000;
                    var minutes = seconds * 60;
                    var hours = minutes * 60;
                    var days = hours * 24;
                    var years = days * 365;
                    var today = new Date();
                    var startYear = "2022";
                    var startMonth = "11";
                    var startDate = "7";
                    var startHour = "0";
                    var startMinute = "0";
                    var startSecond = "0";
                    var todayYear = today.getFullYear();
                    var todayMonth = today.getMonth() + 1;
                    var todayDate = today.getDate();
                    var todayHour = today.getHours();
                    var todayMinute = today.getMinutes();
                    var todaySecond = today.getSeconds();
                    var t1 = Date.UTC(startYear, startMonth, startDate, startHour, startMinute, startSecond);
                    var t2 = Date.UTC(todayYear, todayMonth, todayDate, todayHour, todayMinute, todaySecond);
                    var diff = t2 - t1;
                    var diffYears = Math.floor(diff / years);
                    var diffDays = Math.floor((diff / days) - diffYears * 365);
                    var diffHours = Math.floor((diff - (diffYears * 365 + diffDays) * days) / hours);
                    var diffMinutes = Math.floor((diff - (diffYears * 365 + diffDays) * days - diffHours * hours) /
                        minutes);
                    var diffSeconds = Math.floor((diff - (diffYears * 365 + diffDays) * days - diffHours * hours -
                        diffMinutes * minutes) / seconds);
                    if (startYear == todayYear) {
                        document.getElementById("year").innerHTML = todayYear;
                        document.getElementById("sitetime").innerHTML = "本站已安全运行 " + diffDays + " 天 " + diffHours +
                            " 小时 " + diffMinutes + " 分钟 " + diffSeconds + " 秒";
                    } else {
                        document.getElementById("year").innerHTML = startYear + " - " + todayYear;
                        document.getElementById("sitetime").innerHTML = "本站已安全运行 " + diffYears + " 年 " + diffDays +
                            " 天 " + diffHours + " 小时 " + diffMinutes + " 分钟 " + diffSeconds + " 秒";
                    }
                }
                setInterval(siteTime, 1000);
            </script>
            
            <br>
            
        </div>
        <div class="col s12 m4 l4 social-link social-statis">
    <a href="https://github.com/fanshicheng" class="tooltipped" target="_blank" data-tooltip="访问我的GitHub" data-position="top" data-delay="50">
        <i class="fab fa-github"></i>
    </a>



    <a href="mailto:2639144944@qq.com" class="tooltipped" target="_blank" data-tooltip="邮件联系我" data-position="top" data-delay="50">
        <i class="fas fa-envelope-open"></i>
    </a>







    <a href="tencent://AddContact/?fromId=50&fromSubId=1&subcmd=all&uin=2639144944" class="tooltipped" target="_blank" data-tooltip="QQ联系我: 2639144944" data-position="top" data-delay="50">
        <i class="fab fa-qq"></i>
    </a>







</div>
    </div>
</footer>

<div class="progress-bar"></div>


    <!-- 搜索遮罩框 -->
<div id="searchModal" class="modal">
    <div class="modal-content">
        <div class="search-header">
            <span class="title"><i class="fas fa-search"></i>&nbsp;&nbsp;Search</span>
            <input type="search" id="searchInput" name="s" placeholder="Please enter a search keyword"
                   class="search-input">
        </div>
        <div id="searchResult"></div>
    </div>
</div>

<script type="text/javascript">
$(function () {
    var searchFunc = function (path, search_id, content_id) {
        'use strict';
        $.ajax({
            url: path,
            dataType: "xml",
            success: function (xmlResponse) {
                // get the contents from search data
                var datas = $("entry", xmlResponse).map(function () {
                    return {
                        title: $("title", this).text(),
                        content: $("content", this).text(),
                        url: $("url", this).text()
                    };
                }).get();
                var $input = document.getElementById(search_id);
                var $resultContent = document.getElementById(content_id);
                $input.addEventListener('input', function () {
                    var str = '<ul class=\"search-result-list\">';
                    var keywords = this.value.trim().toLowerCase().split(/[\s\-]+/);
                    $resultContent.innerHTML = "";
                    if (this.value.trim().length <= 0) {
                        return;
                    }
                    // perform local searching
                    datas.forEach(function (data) {
                        var isMatch = true;
                        var data_title = data.title.trim().toLowerCase();
                        var data_content = data.content.trim().replace(/<[^>]+>/g, "").toLowerCase();
                        var data_url = data.url;
                        data_url = data_url.indexOf('/') === 0 ? data.url : '/' + data_url;
                        var index_title = -1;
                        var index_content = -1;
                        var first_occur = -1;
                        // only match artiles with not empty titles and contents
                        if (data_title !== '' && data_content !== '') {
                            keywords.forEach(function (keyword, i) {
                                index_title = data_title.indexOf(keyword);
                                index_content = data_content.indexOf(keyword);
                                if (index_title < 0 && index_content < 0) {
                                    isMatch = false;
                                } else {
                                    if (index_content < 0) {
                                        index_content = 0;
                                    }
                                    if (i === 0) {
                                        first_occur = index_content;
                                    }
                                }
                            });
                        }
                        // show search results
                        if (isMatch) {
                            str += "<li><a href='" + data_url + "' class='search-result-title'>" + data_title + "</a>";
                            var content = data.content.trim().replace(/<[^>]+>/g, "");
                            if (first_occur >= 0) {
                                // cut out 100 characters
                                var start = first_occur - 20;
                                var end = first_occur + 80;
                                if (start < 0) {
                                    start = 0;
                                }
                                if (start === 0) {
                                    end = 100;
                                }
                                if (end > content.length) {
                                    end = content.length;
                                }
                                var match_content = content.substr(start, end);
                                // highlight all keywords
                                keywords.forEach(function (keyword) {
                                    var regS = new RegExp(keyword, "gi");
                                    match_content = match_content.replace(regS, "<em class=\"search-keyword\">" + keyword + "</em>");
                                });

                                str += "<p class=\"search-result\">" + match_content + "...</p>"
                            }
                            str += "</li>";
                        }
                    });
                    str += "</ul>";
                    $resultContent.innerHTML = str;
                });
            }
        });
    };

    searchFunc('/search.xml', 'searchInput', 'searchResult');
});
</script>

    <!-- 回到顶部按钮 -->
<div id="backTop" class="top-scroll">
    <a class="btn-floating btn-large waves-effect waves-light" href="#!">
        <i class="fas fa-arrow-up"></i>
    </a>
</div>

    <script type="text/javascript">
        var OriginTitile=document.title,st;
        document.addEventListener("visibilitychange",function(){
            document.hidden?(document.title="(Ő∀Ő3)ノ",clearTimeout(st)):(document.title="ヽ(●-`Д´-)ノ欢迎回来",st=setTimeout(function(){document.title=OriginTitile},3e3))
        })
    </script>
    <!-- <script src="/js/cursor.js"></script> -->
    <script type="text/javascript">
        //只在桌面版网页启用特效
        var windowWidth = $(window).width();
        if (windowWidth > 768) {
            document.write('<script type="text/javascript" src="/js/sakura.js"><\/script>');
        }
        </script>
    <script src="/libs/materialize/materialize.min.js"></script>
    <script src="/libs/masonry/masonry.pkgd.min.js"></script>
    <script src="/libs/aos/aos.js"></script>
    <script src="/libs/scrollprogress/scrollProgress.min.js"></script>
    <script src="/libs/lightGallery/js/lightgallery-all.min.js"></script>
    <script src="/js/matery.js"></script>

    <!-- Baidu Analytics -->

    <!-- Baidu Push -->

<script>
    (function () {
        var bp = document.createElement('script');
        var curProtocol = window.location.protocol.split(':')[0];
        if (curProtocol === 'https') {
            bp.src = 'https://zz.bdstatic.com/linksubmit/push.js';
        } else {
            bp.src = 'http://push.zhanzhang.baidu.com/push.js';
        }
        var s = document.getElementsByTagName("script")[0];
        s.parentNode.insertBefore(bp, s);
    })();
</script>

    
    
    <script async src="/libs/others/busuanzi.pure.mini.js"></script>
    

    

    
    <script>
        (function (i, s, o, g, r, a, m) {
            i["DaoVoiceObject"] = r;
            i[r] = i[r] || function () {
                (i[r].q = i[r].q || []).push(arguments)
            }, i[r].l = 1 * new Date();
            a = s.createElement(o), m = s.getElementsByTagName(o)[0];
            a.async = 1;
            a.src = g;
            a.charset = "utf-8";
            m.parentNode.insertBefore(a, m)
        })(window, document, "script", ('https:' == document.location.protocol ? 'https:' : 'http:') +
            "//widget.daovoice.io/widget/6984b559.js", "daovoice")
        daovoice('init', {
            app_id: "377cb7de"
        });
        daovoice('update');
    </script>
    

	
    
    <script type="text/javascript" color="0,0,255"
        pointColor="0,0,255" opacity='0.7'
        zIndex="-1" count="99"
        src="/libs/background/canvas-nest.js"></script>
    

    

    

    
    <script src="/libs/instantpage/instantpage.js" type="module"></script>
    
    <!-- 冒泡 -->
    
<canvas class="fireworks" style="position: fixed;left: 0;top: 0;z-index: 1; pointer-events: none;" ></canvas> 
<script type="text/javascript" src="//cdn.bootcss.com/animejs/2.2.0/anime.min.js"></script> 
<script type="text/javascript" src="/js/fireworks.js"></script>


</body>

</html>
